--keep to subset genotypes?Tried GxE test using robust variance estimates before realizing it didn’t fit any sort of relatedness matrix. Could fit PCs instead?
Rserve via conda (otherwise, would have to run in an interactive session on Lewis)# Default PLINK module doesn't allow R plugins
module load plink/plink-high-contig-1.90p
R CMD Rserve --no-save
plink --bfile data/derived_data/robust_joint_gxe/test --threads 12 --allow-no-sex --cow --double-id --covar data/derived_data/robust_joint_gxe/test.cov --R source_functions/robust-joint-int-plugin.R
gxe_gwas <-
purrr::map2_dfr(.x = rep(c("temp", "day_length"), each = 4),
.y = rep(c(2016, 2017, 2018, 2019), times = 2),
~ read_table2(here::here(glue("data/derived_data/gxe_gwas/{.x}/{.y}/result.assoc.txt"))) %>%
rename(pos = ps) %>%
mutate(var = .x,
year = .y,
adj_p = gcif(.,
adjust_p = TRUE),
neglog10p = -log10(adj_p),
q = qvalue::qvalue(adj_p)$qvalues,
neglog10q = -log10(q)))
purrr::map(.x = c(2016, 2017, 2018, 2019),
~ gxe_gwas %>%
filter(var == "day_length") %>%
filter(year == .x) %>%
filter(!is.na(beta)) %>%
mutate(b = round(beta, digits = 7),
se = round(se, digits = 7)) %>%
select(rs, !!rlang::sym(glue("b_{.x}")) := b, !!rlang::sym(glue("se_{.x}")) := se)) %>%
purrr::reduce(left_join) %>%
mutate_if(is.numeric, as.character) %>%
write_tsv(here::here("data/derived_data/metasoft/day_length/metasoft_in.day_length.txt"),
col_names = FALSE)
purrr::map(.x = c(2016, 2017, 2018, 2019),
~ gxe_gwas %>%
filter(var == "temp") %>%
filter(year == .x) %>%
filter(!is.na(beta)) %>%
mutate(b = round(beta, digits = 7),
se = round(se, digits = 7)) %>%
select(rs, !!rlang::sym(glue("b_{.x}")) := b, !!rlang::sym(glue("se_{.x}")) := se)) %>%
purrr::reduce(left_join) %>%
mutate_if(is.numeric, as.character) %>%
write_tsv(here::here("data/derived_data/metasoft/temp/metasoft_in.temp.txt"),
col_names = FALSE)
gxe_metasoft <-
c("temp", "day_length") %>%
purrr::set_names() %>%
purrr::map_dfr(~ read_table2(here::here(glue("data/derived_data/metasoft/{.x}/metasoft_out.{.x}_adj.txt")),
col_types = cols(.default = "d", RSID = "c")) %>%
janitor::clean_names() %>%
select(-pvalue_be, -contains("fe")) %>%
left_join(read_table2(here::here(glue("data/derived_data/metasoft/{.x}/metasoft_out.{.x}_adj.txt")),
skip = 1,
col_types = "c---------------dddddddd",
col_names = c("rsid", "p16", "p17", "p18", "p19", "m16", "m17", "m18", "m19"))) %>%
mutate(chr = as.numeric(str_extract(rsid, "[[:digit:]]{1,2}(?=:)")),
pos = as.numeric(str_extract(rsid, "(?<=[[:digit:]]{1,2}:)[[:digit:]]+")),
neglog10q_re2 = -log10(qvalue::qvalue(pvalue_re2)$qvalues)),
.id = "var")
{farm_id + year + calving_season + age_group + toxic_fescue + score_group} farm_id but no score_group)score_group back in?score_group back inqq <-
gxe_gwas %>%
select(var, year, p_wald, adj_p) %>%
nest(-var, -year) %>%
mutate(pre = purrr::pmap(list(x = data, y = year, z = var),
.f = function(x, y, z) {
x %>%
pull(p_wald) %>%
ggqq() +
labs(title = glue("{y} {z} pre-GC"))
}),
post = purrr::pmap(list(x = data, y = year, z = var),
.f = function(x, y, z) {
x %>%
pull(adj_p) %>%
ggqq() +
labs(title = glue("{y} {z} post-GC"))
})) %>%
select(-data)
## Warning: All elements of `...` must be named.
## Did you want `data = c(p_wald, adj_p)`?
qq %>%
filter(year == 2016 & var == "temp") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2016 & var == "temp") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2016 & var == "day_length") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2016 & var == "day_length") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2017 & var == "temp") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2017 & var == "temp") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2017 & var == "day_length") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2017 & var == "day_length") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2018 & var == "temp") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2018 & var == "temp") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2018 & var == "day_length") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2018 & var == "day_length") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2019 & var == "temp") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2019 & var == "temp") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2019 & var == "day_length") %>%
pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
qq %>%
filter(year == 2019 & var == "day_length") %>%
pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]
gxe_gwas %>%
select(var, year, p_wald, adj_p) %>%
group_by(var, year) %>%
nest() %>%
mutate(`Pre-GC inflation factor` = purrr::map_dbl(.x = data,
~ .x %>%
gcif())) %>%
rename(`Variable` = var,
Year = year) %>%
select(-data) %>%
arrange(Year)
## select: dropped 14 variables (chr, rs, pos, n_miss, allele1, …)
## group_by: 2 grouping variables (var, year)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## mutate (grouped): new variable 'Pre-GC inflation factor' (double) with 8 unique values and 0% NA
## rename: renamed 2 variables (Variable, Year)
## select: dropped one variable (data)
gxe_gwas %>%
filter(var == "day_length") %>%
filter(0.1 > adj_p) %>%
hair_manhattan(y_var = neglog10p,
y_lab = "-log_10(Adj. p-value)",
plot_title = "Day length",
color1 = "#b9aa97",
color2 = "#7e756d",
facet = TRUE,
nfacets = 4,
desc = year)
## filter: removed 2,872,013 rows (50%), 2,872,013 rows remaining
## filter: removed 2,579,212 rows (90%), 292,801 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 292,801 (includes duplicates)
## > =========
## > rows total 292,801
## mutate: new variable 'BPcum' (double) with 244,919 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 292,801 (includes duplicates)
## > =========
## > rows total 292,801
## mutate: new variable 'BPcum' (double) with 244,919 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA
## rename: renamed one variable (facetvar)
gxe_gwas %>%
filter(var == "temp") %>%
filter(0.1 > adj_p) %>%
hair_manhattan(y_var = neglog10p,
y_lab = "-log_10(Adj. p-value)",
plot_title = "Temperature",
color1 = "#b9aa97",
color2 = "#7e756d",
facet = TRUE,
nfacets = 4,
desc = year)
## filter: removed 2,872,013 rows (50%), 2,872,013 rows remaining
## filter: removed 2,586,541 rows (90%), 285,472 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 285,472 (includes duplicates)
## > =========
## > rows total 285,472
## mutate: new variable 'BPcum' (double) with 243,838 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 285,472 (includes duplicates)
## > =========
## > rows total 285,472
## mutate: new variable 'BPcum' (double) with 243,838 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA
## rename: renamed one variable (facetvar)
srun java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/day_length/metasoft_in.day_length.txt -output data/derived_data/metasoft/day_length/metasoft_out.day_length.txt -mvalue -mvalue_p_thres 0.01 -log data/derived_data/metasoft/day_length/metasoft.day_length.log
-lambda_mean 1.401986-lambda_hetero 0.332300:srun java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/day_length/metasoft_in.day_length.txt -output data/derived_data/metasoft/day_length/metasoft_out.day_length_adj.txt -mvalue -mvalue_p_thres 0.01 -lambda_mean 1.401986 -lambda_hetero 0.332300 -log data/derived_data/metasoft/day_length/metasoft.day_length_adj.log
srun --account animalsci java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/temp/metasoft_in.temp.txt -output data/derived_data/metasoft/temp/metasoft_out.temp.txt -mvalue -mvalue_p_thres 0.01 -log data/derived_data/metasoft/temp/metasoft.temp.log
-lambda_mean 1.101779-lambda_hetero 0.470108:srun --account animalsci java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/temp/metasoft_in.temp.txt -output data/derived_data/metasoft/temp/metasoft_out.temp_adj.txt -mvalue -mvalue_p_thres 0.01 -lambda_mean 1.101779 -lambda_hetero 0.470108 -log data/derived_data/metasoft/temp/metasoft.temp_adj.log
ggqq(pvector = gxe_metasoft %>%
filter(var == "day_length") %>%
filter(!is.na(pvalue_re2)) %>%
pull(pvalue_re2)) +
ggtitle("Metasoft RE2 p-valules (day length)")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 178 rows (<1%), 691,756 rows remaining
ggqq(pvector = gxe_metasoft %>%
filter(var == "temp") %>%
filter(!is.na(pvalue_re2)) %>%
pull(pvalue_re2)) +
ggtitle("Metasoft RE2 p-valules (temperature)")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 178 rows (<1%), 691,756 rows remaining
gxe_metasoft %>%
filter(var == "day_length") %>%
filter(0.5 > pvalue_re2) %>%
hair_manhattan(y_var = neglog10q_re2,
color1 = "#b9aa97",
color2 = "#7e756d",
y_lab = latex2exp::TeX("$-log_{10}(RE2 q-value)$"),
plot_title = "Day length",
sigline = q_fdr1) +
geom_hline(yintercept = q_fdr5,
color = "blue")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 350,045 rows (51%), 341,889 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 341,889 (includes duplicates)
## > =========
## > rows total 341,889
## mutate: new variable 'BPcum' (double) with 341,889 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 341,889 (includes duplicates)
## > =========
## > rows total 341,889
## mutate: new variable 'BPcum' (double) with 341,889 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA
gxe_metasoft %>%
filter(var == "temp") %>%
filter(0.5 > pvalue_re2) %>%
hair_manhattan(y_var = neglog10q_re2,
color1 = "#b9aa97",
color2 = "#7e756d",
y_lab = latex2exp::TeX("$-log_{10}(RE2 q-value)$"),
plot_title = "Temperature",
sigline = q_fdr1) +
geom_hline(yintercept = q_fdr5,
color = "blue")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 352,399 rows (51%), 339,535 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 339,535 (includes duplicates)
## > =========
## > rows total 339,535
## mutate: new variable 'BPcum' (double) with 339,535 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 339,535 (includes duplicates)
## > =========
## > rows total 339,535
## mutate: new variable 'BPcum' (double) with 339,535 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA